#################################################
# Function 1: add_controls_to_final_levels_dat #
################################################

# Inputs:
#   dat: The tibble that includes the baseline data you want to add controls to
#   sy_native_pop_ratio: The tibble that includes control for sy_native_ratio_yr
#   residualize_intensive: The tibble that includes control for share_natives_on_fb
#   residualize_extensive: The tibble the includes a control for indiv_l1080_cntrl

# Outputs: A tibble with these data merged together by NUTS3 X Quarter

add_controls_to_final_levels_dat <- function(dat, sy_native_pop_ratio, residualize_intensive, residualize_extensive){

  dat %>%
    left_join(residualize_intensive, by=c("loc1"="nuts3")) %>%
    left_join(residualize_extensive, by=c("loc1"="nuts3")) %>%
    left_join(sy_native_pop_ratio, by=c("loc1"="nuts3", "first_quarter_period1"="quarter")) %>%
    rename(
      indiv_l1080_cntrl1 = indiv_l1080_cntrl,
      share_natives_on_fb1 = share_natives_on_fb,
      sy_native_ratio_yr1 = sy_native_ratio_yr) %>%
    left_join(residualize_intensive, by=c("loc2"="nuts3")) %>%
    left_join(residualize_extensive, by=c("loc2"="nuts3")) %>%
    left_join(sy_native_pop_ratio, by=c("loc2"="nuts3", "first_quarter_period2"="quarter")) %>%
    rename(
      indiv_l1080_cntrl2 = indiv_l1080_cntrl,
      share_natives_on_fb2 = share_natives_on_fb,
      sy_native_ratio_yr2 = sy_native_ratio_yr)
}



###########################################
# Function 2: residualize_on_native_usage #
###########################################

# This function residualizes each component of our movers' plots on the extensive and intensive usage controls
# and adds back the mean.

# Inputs:
#   dat: The tibble that includes the baseline data
#   [pre,post]_lhs_to_use: A vector with the strings of variables to be residualized

# Outputs: A tibble with the residualized versions of the lhs variables

residualize_on_native_usage <- function(dat, pre_lhs_to_use, post_lhs_to_use){

  # Loop through all LHS
  for(curr_lhs in pre_lhs_to_use){
    dat[[paste0(curr_lhs, "_resid")]] <-
      mean(dat[[curr_lhs]]) +
      resid(lm(formula=paste(curr_lhs, "indiv_l1080_cntrl1 + share_natives_on_fb1", sep=" ~ "), data=dat))
  }
  for(curr_lhs in post_lhs_to_use){
    dat[[paste0(curr_lhs, "_resid")]] <-
      mean(dat[[curr_lhs]]) +
      resid(lm(formula=paste(curr_lhs, "indiv_l1080_cntrl2 + share_natives_on_fb2", sep=" ~ "), data=dat))
  }

  return(dat)

}



#########################################
# Function 3: make_movers_summary_table #
#########################################

# Inputs:
#   dat: The tibble that includes the final dataset
#   ntiles_dat: The tibble the includes whether a NUTS3 is below (1) or above (2) the median
#   outcome_indiv: The primary individual-level outcome
#   outcome_avg: The primary (analagous) regional-level outcome

# Outputs: A tibble that summarizes movers and their matches
#             above and below median by DESTINATION.

make_movers_summary_table <- function(dat, ntiles_dat, outcome_indiv, outcome_avg){

  dat <- dat %>%
    left_join(ntiles_dat, by=c("loc2"="nuts3")) %>%
    rename(nuts3_dest_ntile = nuts3_ntile)

  ## Movers -- Pre ##
  movers_pre_all <- dat %>%
    summarise(
      pct_female = mean(is_female) * 100,
      avg_age = mean(age),
      avg_qs_de = mean(qrtrs_de),
      n_30d = mean(pre_n_30d),
      avg_produ_in_de = mean(pre_produ_content_de_in_quarter),
      outcome = mean(!!sym(outcome_indiv)),
      n=n()) %>%
    ungroup %>%
    t()

  movers_pre_split <- dat %>%
    group_by(nuts3_dest_ntile) %>%
    summarise(
      pct_female = mean(is_female) * 100,
      avg_age = mean(age),
      avg_qs_de = mean(qrtrs_de),
      n_30d = mean(pre_n_30d),
      avg_produ_in_de = mean(pre_produ_content_de_in_quarter),
      outcome = mean(!!sym(outcome_indiv)),
      n=n()) %>%
    ungroup

  movers_pre_bottom <- movers_pre_split %>%
    filter(nuts3_dest_ntile == 1) %>%
    select(-nuts3_dest_ntile) %>% t()

  movers_pre_top <- movers_pre_split %>%
    filter(nuts3_dest_ntile == 2) %>%
    select(-nuts3_dest_ntile) %>% t()

  ## Non-Movers Matched -- Pre ##
  matched_pre_all <- dat %>%
    summarise(
      pct_female = mean(pre_share_female) * 100,
      avg_age = mean(pre_avg_age),
      avg_qs_de = mean(pre_avg_qrtrs_de),
      n_30d = mean(pre_avg_n_30d),
      avg_produ_in_de_in_quarter = mean(pre_avg_produ_content_de_in_quarter),
      outcome = mean(!!sym(outcome_avg)),
      n = n()) %>%
    ungroup() %>%
    t()

  matched_pre_split <- dat %>%
    group_by(nuts3_dest_ntile) %>%
    summarise(
      pct_female = mean(pre_share_female) * 100,
      avg_age = mean(pre_avg_age),
      avg_qs_de = mean(pre_avg_qrtrs_de),
      n_30d = mean(pre_avg_n_30d),
      avg_produ_in_de_in_quarter = mean(pre_avg_produ_content_de_in_quarter),
      outcome = mean(!!sym(outcome_avg)),
      n = n()) %>%
    ungroup()

  matched_pre_bottom <- matched_pre_split %>%
    filter(nuts3_dest_ntile == 1) %>%
    select(-nuts3_dest_ntile) %>% t()

  matched_pre_top <- matched_pre_split %>%
    filter(nuts3_dest_ntile == 2) %>%
    select(-nuts3_dest_ntile) %>% t()

  ## Combine in final tibble ##
  tibble(outcome=c("share_female", "avg_age", "avg_qs_de", "n_any_frnd",
                   "avg_produ_in_de_in_quarter", "outcome", "N")) %>%
    bind_cols(
      tibble(
        movers_pre_all = movers_pre_all[,1],
        matched_pre_all = matched_pre_all[,1],
        movers_pre_bottom = movers_pre_bottom[,1],
        matched_pre_bottom = matched_pre_bottom[,1],
        movers_pre_top = movers_pre_top[,1],
        matched_pre_top = matched_pre_top[,1]
      ))
}



##########################################
# Function 4: make_heterogeneity_graphs #
#########################################

# Inputs:
#   dat: The tibble that includes the data you want to create the plot from
#   formula_string: The formula for the regression you want to run over different samples

# Outputs: A table with the data for the plot of interest (the slope over various samples)

make_heterogeneity_graphs <- function(dat, formula_string){

  est1 <- felm(formula=as.formula(formula_string), data = dat)
  est2 <- felm(formula=as.formula(formula_string), data = filter(dat, is_female == 1))
  est3 <- felm(formula=as.formula(formula_string), data = filter(dat, is_female == 0))
  est4 <- felm(formula=as.formula(formula_string),data = filter(dat, age_grp == 1))
  est5 <- felm(formula=as.formula(formula_string), data = filter(dat, age_grp == 2))
  est6 <- felm(formula=as.formula(formula_string), data = filter(dat, age_grp == 3))

  tibble(
    groups = c(
      "All",
      "Female", "Male",
      "Age <30", "Age 30-39", "Age 40+"
    ),
    group_group = c(
      "All",
      "Gender", "Gender",
      "Age", "Age", "Age"
    ),
    coeffs = c(
      est1$coefficients[[1]],
      est2$coefficients[[1]], est3$coefficients[[1]],
      est4$coefficients[[1]], est5$coefficients[[1]], est6$coefficients[[1]]
    ),
    ses = c(
      est1$se[[1]],
      est2$se[[1]], est3$se[[1]],
      est4$se[[1]], est5$se[[1]], est6$se[[1]]
    ),
    n = c(
      est1$N,
      est2$N, est3$N,
      est4$N, est5$N, est6$N
    )
  )

}